#!/usr/bin/env bash

# Train and test STAViS audiovisual models for all splits

base_path='./experiments'

split=1
python main.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
    --result_path ./audiovisual_complete_test1/split${split}_results \
    --dataset split${split} \
    --pool_layer "avgpool" \
    --learning_rate 0.01 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_att \
    --no_use_dsam_multi \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual True --checkpoint 20 --n_epochs 60 \
    --root_path ${base_path} \
    --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_coutrot1_train ./data/fold_lists/Coutrot_db1_list_train_${split}_fps.txt \
  	--annotation_path_coutrot1_test ./data/fold_lists/Coutrot_db1_list_test_${split}_fps.txt \
  	--annotation_path_coutrot2_train ./data/fold_lists/Coutrot_db2_list_train_${split}_fps.txt \
  	--annotation_path_coutrot2_test ./data/fold_lists/Coutrot_db2_list_test_${split}_fps.txt \
  	--annotation_path_summe_train ./data/fold_lists/SumMe_list_train_${split}_fps.txt \
  	--annotation_path_summe_test ./data/fold_lists/SumMe_list_test_${split}_fps.txt \
  	--annotation_path_etmd_train ./data/fold_lists/ETMD_av_list_train_${split}_fps.txt \
  	--annotation_path_etmd_test ./data/fold_lists/ETMD_av_list_test_${split}_fps.txt \
  	--annotation_path_avad_train ./data/fold_lists/AVAD_list_train_${split}_fps.txt \
  	--annotation_path_avad_test ./data/fold_lists/AVAD_list_test_${split}_fps.txt

split=2
python main.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
    --result_path ./audiovisual_complete_test1/split${split}_results \
    --dataset split${split} \
    --pool_layer "avgpool" \
    --learning_rate 0.01 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_multi \
    --no_use_dsam_att \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual True --checkpoint 20 --n_epochs 60 \
    --root_path ${base_path} \
    --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_coutrot1_train ./data/fold_lists/Coutrot_db1_list_train_${split}_fps.txt \
  	--annotation_path_coutrot1_test ./data/fold_lists/Coutrot_db1_list_test_${split}_fps.txt \
  	--annotation_path_coutrot2_train ./data/fold_lists/Coutrot_db2_list_train_${split}_fps.txt \
  	--annotation_path_coutrot2_test ./data/fold_lists/Coutrot_db2_list_test_${split}_fps.txt \
  	--annotation_path_summe_train ./data/fold_lists/SumMe_list_train_${split}_fps.txt \
  	--annotation_path_summe_test ./data/fold_lists/SumMe_list_test_${split}_fps.txt \
  	--annotation_path_etmd_train ./data/fold_lists/ETMD_av_list_train_${split}_fps.txt \
  	--annotation_path_etmd_test ./data/fold_lists/ETMD_av_list_test_${split}_fps.txt \
  	--annotation_path_avad_train ./data/fold_lists/AVAD_list_train_${split}_fps.txt \
  	--annotation_path_avad_test ./data/fold_lists/AVAD_list_test_${split}_fps.txt
  
split=3
python main.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
    --result_path ./audiovisual_complete_test1/split${split}_results \
    --dataset split${split} \
    --pool_layer "avgpool" \
    --learning_rate 0.01 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_multi \
    --no_use_dsam_att \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual True --checkpoint 20 --n_epochs 60 \
    --root_path ${base_path} \
    --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_coutrot1_train ./data/fold_lists/Coutrot_db1_list_train_${split}_fps.txt \
  	--annotation_path_coutrot1_test ./data/fold_lists/Coutrot_db1_list_test_${split}_fps.txt \
  	--annotation_path_coutrot2_train ./data/fold_lists/Coutrot_db2_list_train_${split}_fps.txt \
  	--annotation_path_coutrot2_test ./data/fold_lists/Coutrot_db2_list_test_${split}_fps.txt \
  	--annotation_path_summe_train ./data/fold_lists/SumMe_list_train_${split}_fps.txt \
  	--annotation_path_summe_test ./data/fold_lists/SumMe_list_test_${split}_fps.txt \
  	--annotation_path_etmd_train ./data/fold_lists/ETMD_av_list_train_${split}_fps.txt \
  	--annotation_path_etmd_test ./data/fold_lists/ETMD_av_list_test_${split}_fps.txt \
  	--annotation_path_avad_train ./data/fold_lists/AVAD_list_train_${split}_fps.txt \
  	--annotation_path_avad_test ./data/fold_lists/AVAD_list_test_${split}_fps.txt

split=1
python main.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
    --result_path ./audiovisual_complete_test/split${split}_results \
    --dataset split${split} \
    --pool_layer "avgpool" \
    --learning_rate 0.01 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_att \
    --no_use_dsam_multi \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual "" --checkpoint 20 --n_epochs 60 \
    --root_path ${base_path} \
    --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_coutrot1_train ./data/fold_lists/Coutrot_db1_list_train_${split}_fps.txt \
  	--annotation_path_coutrot1_test ./data/fold_lists/Coutrot_db1_list_test_${split}_fps.txt \
  	--annotation_path_coutrot2_train ./data/fold_lists/Coutrot_db2_list_train_${split}_fps.txt \
  	--annotation_path_coutrot2_test ./data/fold_lists/Coutrot_db2_list_test_${split}_fps.txt \
  	--annotation_path_summe_train ./data/fold_lists/SumMe_list_train_${split}_fps.txt \
  	--annotation_path_summe_test ./data/fold_lists/SumMe_list_test_${split}_fps.txt \
  	--annotation_path_etmd_train ./data/fold_lists/ETMD_av_list_train_${split}_fps.txt \
  	--annotation_path_etmd_test ./data/fold_lists/ETMD_av_list_test_${split}_fps.txt \
  	--annotation_path_avad_train ./data/fold_lists/AVAD_list_train_${split}_fps.txt \
  	--annotation_path_avad_test ./data/fold_lists/AVAD_list_test_${split}_fps.txt

split=2
python main.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
    --result_path ./audiovisual_complete_test/split${split}_results \
    --dataset split${split} \
    --pool_layer "avgpool" \
    --learning_rate 0.01 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_att \
    --no_use_dsam_multi \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual "" --checkpoint 20 --n_epochs 60 \
    --root_path ${base_path} \
    --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_coutrot1_train ./data/fold_lists/Coutrot_db1_list_train_${split}_fps.txt \
  	--annotation_path_coutrot1_test ./data/fold_lists/Coutrot_db1_list_test_${split}_fps.txt \
  	--annotation_path_coutrot2_train ./data/fold_lists/Coutrot_db2_list_train_${split}_fps.txt \
  	--annotation_path_coutrot2_test ./data/fold_lists/Coutrot_db2_list_test_${split}_fps.txt \
  	--annotation_path_summe_train ./data/fold_lists/SumMe_list_train_${split}_fps.txt \
  	--annotation_path_summe_test ./data/fold_lists/SumMe_list_test_${split}_fps.txt \
  	--annotation_path_etmd_train ./data/fold_lists/ETMD_av_list_train_${split}_fps.txt \
  	--annotation_path_etmd_test ./data/fold_lists/ETMD_av_list_test_${split}_fps.txt \
  	--annotation_path_avad_train ./data/fold_lists/AVAD_list_train_${split}_fps.txt \
  	--annotation_path_avad_test ./data/fold_lists/AVAD_list_test_${split}_fps.txt
  
split=3
python main.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
    --result_path ./audiovisual_complete_test/split${split}_results \
    --dataset split${split} \
    --pool_layer "avgpool" \
    --learning_rate 0.01 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_att \
    --no_use_dsam_multi \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual "" --checkpoint 20 --n_epochs 60 \
    --root_path ${base_path} \
    --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_coutrot1_train ./data/fold_lists/Coutrot_db1_list_train_${split}_fps.txt \
  	--annotation_path_coutrot1_test ./data/fold_lists/Coutrot_db1_list_test_${split}_fps.txt \
  	--annotation_path_coutrot2_train ./data/fold_lists/Coutrot_db2_list_train_${split}_fps.txt \
  	--annotation_path_coutrot2_test ./data/fold_lists/Coutrot_db2_list_test_${split}_fps.txt \
  	--annotation_path_summe_train ./data/fold_lists/SumMe_list_train_${split}_fps.txt \
  	--annotation_path_summe_test ./data/fold_lists/SumMe_list_test_${split}_fps.txt \
  	--annotation_path_etmd_train ./data/fold_lists/ETMD_av_list_train_${split}_fps.txt \
  	--annotation_path_etmd_test ./data/fold_lists/ETMD_av_list_test_${split}_fps.txt \
  	--annotation_path_avad_train ./data/fold_lists/AVAD_list_train_${split}_fps.txt \
  	--annotation_path_avad_test ./data/fold_lists/AVAD_list_test_${split}_fps.txt

python eval_multiprocess.py --base_path /media/Diskd/projects/STAViS_AV \
--eval_path experiments/audiovisual_complete_test

python eval_multiprocess.py --base_path /media/Diskd/projects/STAViS_AV \
--eval_path experiments/audiovisual_complete_test1